0: Preparation
Defining the input and output files
# Clean the working environment
rm(list = ls())
knitr::opts_chunk$set(echo = TRUE)
empirical_species <- "German Shepherd"
document_title <- paste(empirical_species," Pipeline Results")
MAF_pruning_used <- FALSE
N_e <- 70
document_sub <- paste("MAF 0.05 used, but only for H_E-computation", N_e)
# if (MAF_pruning_used == FALSE) {
# document_sub <- paste("No MAF-based pruning used, N_e =", N_e)
#
#
# } else {
# document_sub <- paste("MAF-based pruning used, N_e =", N_e)
# }
############################################
# Parameters used for displaying the result
############################################
ROH_frequency_decimals <- 1
H_e_values_decimals <- 5
F_ROH_values_decimals <- 3
####################################
# Defining the input files
####################################
Selection_strength_test_dir <- Sys.getenv("Selection_strength_test_dir")
Sweep_test_dir <- Sys.getenv("Sweep_test_dir")
###############
## Empirical ###
###############
### ROH hotspots ###
Empirical_data_ROH_hotspots_dir <- Sys.getenv("Empirical_data_ROH_hotspots_dir")
Empirical_data_autosome_ROH_freq_dir <- Sys.getenv("Empirical_data_autosome_ROH_freq_dir")
### Inbreeding coefficient ###
Empirical_data_F_ROH_dir <- Sys.getenv("Empirical_data_F_ROH_dir")
### Expected Heterozygosity distribution ###
Empirical_data_H_e_dir <- Sys.getenv("Empirical_data_H_e_dir")
###############
## Simulated ###
###############
### Causative variant ###
variant_freq_plots_dir <- Sys.getenv("variant_freq_plots_dir")
variant_position_dir <- Sys.getenv("variant_position_dir")
pruned_replicates_count_dir <- Sys.getenv("pruned_replicates_count_dir")
Selection_causative_variant_windows_dir <- Sys.getenv("Selection_causative_variant_windows_dir")
causative_variant_windows_H_e_dir <- Sys.getenv("causative_variant_windows_H_e_dir")
### ROH hotspots ###
Neutral_model_ROH_hotspots_dir <- Sys.getenv("Neutral_model_ROH_hotspots_dir")
Neutral_model_autosome_ROH_freq_dir <- Sys.getenv("Neutral_model_autosome_ROH_freq_dir")
Selection_model_ROH_hotspots_dir <- Sys.getenv("Selection_model_ROH_hotspots_dir")
Selection_model_autosome_ROH_freq_dir <- Sys.getenv("Selection_model_autosome_ROH_freq_dir")
### Inbreeding coefficient ###
Neutral_model_F_ROH_dir <- Sys.getenv("Neutral_model_F_ROH_dir")
Selection_model_F_ROH_dir <- Sys.getenv("Selection_model_F_ROH_dir")
### Expected Heterozygosity distribution ###
Neutral_model_H_e_dir <- Sys.getenv("Neutral_model_H_e_dir")
Selection_model_H_e_dir <- Sys.getenv("Selection_model_H_e_dir")
histogram_line_sizes <- 3
empirical_data_color <- "darkgreen"
neutral_model_color <- "blue"
selection_model_color <- "purple"
output_dir <- Sys.getenv("Pipeline_results_output_dir")
# Sys.getenv()
# # Set the working directory for notebook chunks
# knitr::opts_knit$set(root.dir = output_dir_sweep_test)
Loading libraries
library(knitr)
## Warning: package 'knitr' was built under R version 4.3.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
Causative variant
Variant fixation
Fixation probability
Fixation time
# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
# Find the index of the first row where the allele frequency is 0.9 or higher
fixation_index <- which(data$allele_frequency >= threshold)[1]
# Return the number of rows until fixation is reached
return(fixation_index - 1)
}
Summary - Variant fixation
| s=0.1 |
0.4 |
35.55 |
29 |
39 |
| s=0.2 |
9.0 |
33.95 |
24 |
39 |
| s=0.3 |
27.8 |
26.65 |
19 |
34 |
| s=0.4 |
40.0 |
22.30 |
13 |
38 |
| s=0.5 |
51.3 |
14.45 |
10 |
23 |
| s=0.6 |
37.0 |
12.55 |
10 |
18 |
| s=0.7 |
46.5 |
9.95 |
6 |
16 |
| s=0.8 |
42.6 |
7.60 |
5 |
11 |
Causative variant windows
Variant position
Window lengths
Summary - Causative variant windows
| 1 |
s=0.1 |
4.425 |
56.0 |
12.9 |
100 |
57.1 |
| 4 |
s=0.4 |
4.575 |
75.9 |
18.6 |
100 |
76.5 |
| 2 |
s=0.2 |
4.770 |
57.8 |
7.1 |
100 |
58.7 |
| 3 |
s=0.3 |
4.785 |
71.7 |
20.0 |
100 |
72.7 |
| 8 |
s=0.8 |
5.120 |
81.3 |
15.7 |
100 |
81.5 |
| 7 |
s=0.7 |
5.210 |
84.8 |
31.4 |
100 |
85.6 |
| 5 |
s=0.5 |
5.385 |
83.9 |
22.9 |
100 |
85.1 |
| 6 |
s=0.6 |
5.485 |
78.0 |
10.0 |
100 |
78.5 |
Standard Error Confidence interval function
Confidence level <=> konfidensgrad
# Function to calculate standard error and its confidence interval
standard_error_confidence_interval_fun <- function(observed_data, confidence_level = 0.95) {
# Calculate standard error
n <- length(observed_data)
standard_deviation <- sd(observed_data)
standard_error <- standard_deviation / sqrt(n - 1)
# Calculate confidence interval based on standard error
alpha <- (1 - confidence_level) / 2
margin_of_error <- qnorm(1 - alpha) * standard_error
mean_estimate <- mean(observed_data)
# Calculate the percentiles for the confidence interval
confidence_interval_lower_bound <- mean_estimate - margin_of_error # 2.5th percentile (2σ)
confidence_interval_upper_bound <- mean_estimate + margin_of_error # 97.5th percentile (2σ)
# Return confidence interval
return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
}
# # Function to calculate bootstrap confidence intervals
# standard_error_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
#
# # Function to calculate the mean for each bootstrap sample
# compute_bootstrap_mean_fun <- function(observed_data_urn) {
# bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
# bootstrap_estimate <- mean(bootstrap_dataset)
# return(bootstrap_estimate)
# }
#
# # Perform bootstrap resampling
# bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
#
# # Calculate the percentiles for the confidence interval
# alpha <- (1 - confidence_level) / 2
# confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
# confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
#
# # Return the confidence interval
# return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
# }
1: ROH-Frequency
1.1 : Autosome ROH-frequencies
1.1.1 : Empirical - ROH frequency
1.1.2: Neutral Model - ROH frequency
1.1.3: Selection Model
1.1.4 Summary - ROH-hotspot threshold
## ROH-hotspot selection testing results://n
| Neutral |
30.5 |
| s=0.1 |
32.2 |
| s=0.2 |
35.1 |
| s=0.3 |
37.3 |
| s=0.4 |
37.9 |
| s=0.6 |
41.4 |
| s=0.7 |
41.8 |
| s=0.5 |
42.6 |
| s=0.8 |
47.6 |
| Empirical |
75.0 |
1.2 ROH-hotspots - ROH Frequency and Length
2: Inbreeding coefficient
2.1 Empirical data

## Overall Average Avg_F_ROH for german_shepherd : 0.2753012 //n
2.2 Neutral Model

## Point estimate F_ROH across all 20 simulations: 0.25007 //n
## [1] "Bootstrap 95% Confidence Interval: [0.23473890934696, 0.265401133510183]"
2.3 Selection Model

## Point estimate F_ROH across all 20 simulations for selection_model_s01_chr3 : 0.2734885 //n[1] "Bootstrap 95% Confidence Interval: [0.259682877404132, 0.287294222595868]"

## Point estimate F_ROH across all 20 simulations for selection_model_s02_chr3 : 0.290046 //n[1] "Bootstrap 95% Confidence Interval: [0.268404249001561, 0.311687808141296]"

## Point estimate F_ROH across all 20 simulations for selection_model_s03_chr3 : 0.3161388 //n[1] "Bootstrap 95% Confidence Interval: [0.294101862625108, 0.338175708803464]"

## Point estimate F_ROH across all 20 simulations for selection_model_s04_chr3 : 0.3145064 //n[1] "Bootstrap 95% Confidence Interval: [0.286793720981368, 0.342219150447203]"

## Point estimate F_ROH across all 20 simulations for selection_model_s05_chr3 : 0.3394231 //n[1] "Bootstrap 95% Confidence Interval: [0.313208544370823, 0.36563759848632]"

## Point estimate F_ROH across all 20 simulations for selection_model_s06_chr3 : 0.348661 //n[1] "Bootstrap 95% Confidence Interval: [0.319210697417095, 0.378111345440048]"

## Point estimate F_ROH across all 20 simulations for selection_model_s07_chr3 : 0.3598673 //n[1] "Bootstrap 95% Confidence Interval: [0.337347371590621, 0.382387271266521]"

## Point estimate F_ROH across all 20 simulations for selection_model_s08_chr3 : 0.3911701 //n[1] "Bootstrap 95% Confidence Interval: [0.350246216491064, 0.432094012080364]"
2.4 F_ROH summary
| Neutral |
0.250 |
0.235 |
0.265 |
| s=0.1 |
0.273 |
0.260 |
0.287 |
| Empirical |
0.275 |
NA |
NA |
| s=0.2 |
0.290 |
0.268 |
0.312 |
| s=0.4 |
0.315 |
0.287 |
0.342 |
| s=0.3 |
0.316 |
0.294 |
0.338 |
| s=0.5 |
0.339 |
0.313 |
0.366 |
| s=0.6 |
0.349 |
0.319 |
0.378 |
| s=0.7 |
0.360 |
0.337 |
0.382 |
| s=0.8 |
0.391 |
0.350 |
0.432 |
2.4.1 Visualizaing F_ROH

## Models where empirical F_ROH is within CI:
## [1] "s=0.1" "s=0.2"
##
## Models where empirical F_ROH is outside CI:
## [1] "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8" "Neutral"
3: Expected Heterozygosity
3.1 Empirical data
3.2 Neutral Model
3.3 Selection Model
## Uncommented because change of analysis
3.4 Causative Variant Window
## Point estimate H_e across all 20 simulations for s01_chr3 : 0.2868376 //n[1] "Bootstrap 95% Confidence Interval: [0.245496723705311, 0.328178482973025]"
## Point estimate H_e across all 20 simulations for s02_chr3 : 0.2675987 //n[1] "Bootstrap 95% Confidence Interval: [0.218489342307825, 0.316708088202673]"
## Point estimate H_e across all 20 simulations for s03_chr3 : 0.2957272 //n[1] "Bootstrap 95% Confidence Interval: [0.248521469186317, 0.342932849088328]"
## Point estimate H_e across all 20 simulations for s04_chr3 : 0.2811599 //n[1] "Bootstrap 95% Confidence Interval: [0.232540572558871, 0.329779132184675]"
## Point estimate H_e across all 20 simulations for s05_chr3 : 0.2581193 //n[1] "Bootstrap 95% Confidence Interval: [0.195799364861395, 0.320439239968004]"
## Point estimate H_e across all 20 simulations for s06_chr3 : 0.2936235 //n[1] "Bootstrap 95% Confidence Interval: [0.230674218749797, 0.356572770587744]"
## Point estimate H_e across all 20 simulations for s07_chr3 : 0.273871 //n[1] "Bootstrap 95% Confidence Interval: [0.213013895772511, 0.33472801978693]"
## Point estimate H_e across all 20 simulations for s08_chr3 : 0.276737 //n[1] "Bootstrap 95% Confidence Interval: [0.226492798388071, 0.326981220882245]"
3.4 Genomewide 5th percentile comparison - Expected Heterozygosity Summary
| Neutral |
0.12797 |
0.12039 |
0.13555 |
| s08_chr3 |
0.12831 |
0.12031 |
0.13632 |
| s01_chr3 |
0.12840 |
0.12246 |
0.13433 |
| s02_chr3 |
0.13008 |
0.12140 |
0.13876 |
| s04_chr3 |
0.13092 |
0.12240 |
0.13943 |
| s03_chr3 |
0.13443 |
0.12843 |
0.14043 |
| s06_chr3 |
0.13551 |
0.12673 |
0.14429 |
| s07_chr3 |
0.13766 |
0.12616 |
0.14916 |
| s05_chr3 |
0.14148 |
0.13333 |
0.14964 |
| Empirical |
NA |
NA |
NA |
4: Summary
4.0 General comparison
4.0.1 ROH-hotspot threshold comparison
##
## ROH-hotspot threshold comparison between the different datasets
| Neutral |
30.5 |
| s=0.1 |
32.2 |
| s=0.2 |
35.1 |
| s=0.3 |
37.3 |
| s=0.4 |
37.9 |
| s=0.6 |
41.4 |
| s=0.7 |
41.8 |
| s=0.5 |
42.6 |
| s=0.8 |
47.6 |
| Empirical |
75.0 |
4.0.2 F_ROH comparison
| Neutral |
0.250 |
0.235 |
0.265 |
| s=0.1 |
0.273 |
0.260 |
0.287 |
| Empirical |
0.275 |
NA |
NA |
| s=0.2 |
0.290 |
0.268 |
0.312 |
| s=0.4 |
0.315 |
0.287 |
0.342 |
| s=0.3 |
0.316 |
0.294 |
0.338 |
| s=0.5 |
0.339 |
0.313 |
0.366 |
| s=0.6 |
0.349 |
0.319 |
0.378 |
| s=0.7 |
0.360 |
0.337 |
0.382 |
| s=0.8 |
0.391 |
0.350 |
0.432 |
4.1: Hotspot comparison
4.1.1: Selection test (Sweep test)
## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the lower confidence interval of the fifth percentile of the neutral model are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0.1203882"
| Hotspot_chr3_window_1 |
No |
0.12799 |
| Hotspot_chr3_window_3 |
No |
0.13260 |
| Hotspot_chr17_window_2 |
No |
0.14866 |
| Hotspot_chr3_window_2 |
No |
0.15005 |
| Hotspot_chr17_window_1 |
No |
0.15685 |
| Hotspot_chr19_window_1 |
No |
0.17061 |
## [1] "ROH-hotspots under selection:"
4.1.2: Selection Strength Test (Sweep test)
Hotspot - Causative Window - Comparison
| Hotspot_chr3_window_1 |
10.900 |
81.3 |
| s=0.6 |
5.485 |
78.0 |
| s=0.5 |
5.385 |
83.9 |
| s=0.7 |
5.210 |
84.8 |
| s=0.8 |
5.120 |
81.3 |
| s=0.3 |
4.785 |
71.7 |
| s=0.2 |
4.770 |
57.8 |
| s=0.4 |
4.575 |
75.9 |
| s=0.1 |
4.425 |
56.0 |
| Hotspot_chr19_window_1 |
4.400 |
75.6 |
| Hotspot_chr3_window_2 |
3.200 |
76.3 |
| Hotspot_chr3_window_3 |
2.700 |
77.6 |
| Hotspot_chr17_window_1 |
2.000 |
76.4 |
| Hotspot_chr17_window_2 |
0.600 |
76.1 |
Visualizing and scaling

5 Visualizing Expected Heterozygoisty
5.1 Bootstrap CI for mean 5th percentile H_e

## Models where empirical H_e is within CI for Hotspot_chr3_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_1 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr3_window_3 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_3 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr17_window_2 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr17_window_2 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr3_window_2 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_2 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr17_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr17_window_1 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr19_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr19_window_1 :
## character(0)
5.2 5th percentile of the extreme H_e values